import pandas as pd;
import numpy as np;
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
artists=pd.read_csv("data/artists.csv")
artists.sample(5)
| id | followers | genres | name | popularity | TmpGenres | macro genre | |
|---|---|---|---|---|---|---|---|
| 359002 | 3wEWsZuvUW4C3Vuw9tniiZ | 285.0 | ['aikatsu'] | せな・るか from AIKATSU☆STARS! | 9 | ["'aikatsu'"] | alternative |
| 940563 | 7eBWOD2EmhG5KBFAB5xiSb | 85.0 | [] | Bryon Tosoff | 7 | [''] | alternative |
| 413354 | 17dzS68S7iPOOgy9NFtPC7 | 1.0 | [] | Lorraine Palmer O'Reilly | 0 | [''] | alternative |
| 112405 | 02VrJiGGcC0lZRC7CgJzpz | 0.0 | [] | Micah Byrns | 0 | [''] | alternative |
| 336950 | 3PJxzQhkwTuxueC4LC6PLg | 6.0 | ['classic iskelma'] | Aira-Anneli | 0 | ["'classic iskelma'"] | alternative |
def categorize():
artists=pd.read_csv("data/artists.csv")
artists["TmpGenres"]=artists["genres"].apply(lambda x: x.strip('][').split(','))#convert arrays
allGeners=np.unique(np.concatenate(artists["TmpGenres"]))
allGeners[0]="unknown"
macroGeneres = dict.fromkeys(
["alternative", "rock", "metal", "pop", "rap", "punk", "jazz", "reggae", "soul", "polka", "country", "electronic",
"funk", "hip hop", "r&b", "folk", "house", "techno", "trance", "indie", "blues", "instrument", "electronic",
"disco", "deep", "hardcore", "wave","trap" ,"other"], 0)
def getMacroCategory(str):
counts = dict.fromkeys(macroGeneres.keys(),0)
for e in str:
words = e
for x in macroGeneres.keys():
if words.count(x)>0:
counts[x]+=1
counts = {key: value for key, value in sorted(counts.items(), key=lambda item: (-item[1],item[0]), reverse=False)}
#print(counts)
if len(counts)>0:
if list(counts.keys())[0] in list(macroGeneres.keys()):
macroGeneres[list(counts.keys())[0]] += 1
return list(counts.keys())[0]
else:
return "other"
return "other"
#Init artists
artists["TmpGenres"] = artists["genres"].apply(lambda x: x.strip('][').split(',')) #convert arrays
allGeners = np.unique(np.concatenate(artists["TmpGenres"]))
allGeners[0] = "unknown"
import operator
artists["macro genre"] = artists.apply(lambda z: getMacroCategory(z["TmpGenres"]), axis=1)
artists.to_csv("data/artists.csv", index=False)
#categorize()
artists=pd.read_csv("data/artists.csv")
genereDF=artists.groupby(["macro genre"],as_index=False).size()
fig=px.histogram(genereDF, y="macro genre", x="size", log_x=True )
fig.update_yaxes(categoryorder="total ascending")
fig.layout["xaxis"]["title"]="Generes"
fig.layout["yaxis"]["title"]="Number of Artists"
fig.layout.yaxis.dtick=0.5
fig.show()
tmpData=artists.sample(10000);
fig=px.scatter(tmpData,x="followers",y="popularity",hover_name=tmpData["name"], log_x=True)#
fig.show()
metalArtists=artists.loc[artists["macro genre"]=="metal"]
top20Metal=metalArtists.sort_values(by="popularity",ascending=False)
fig = px.histogram(top20Metal[:10],x="popularity",y="name")
fig.update_yaxes(categoryorder="total ascending")
fig.layout["yaxis"]["title"]="Artist"
fig.layout["xaxis"]["title"]="Popularity"
fig.layout.yaxis.dtick=0.5
fig.show()
metalArtists=artists.loc[artists["macro genre"]=="rap"]
top20Metal=metalArtists.sort_values(by="popularity",ascending=False)
fig = px.histogram(top20Metal[:10],x="popularity",y="name")
fig.update_yaxes(categoryorder="total ascending")
fig.layout["yaxis"]["title"]="Artist"
fig.layout["xaxis"]["title"]="Popularity"
fig.layout.yaxis.dtick=0.5
fig.show()
maxPerGenere={}
for a in artists["macro genre"].unique():
tmp=artists.loc[artists["macro genre"]==a]
if(np.size(tmp)>0):
maxPerGenere[a]=tmp.sort_values(by="popularity",ascending=False).iloc[0]["popularity"]
fig=px.histogram(pd.DataFrame.from_dict({'Genere':maxPerGenere.keys(),"Popularity":maxPerGenere.values()}).sort_values(by="Popularity",ascending=False),x="Genere",y="Popularity")
fig.update_yaxes(categoryorder="total ascending")
fig.layout["yaxis"]["title"]="Popularity"
fig.layout["xaxis"]["title"]="Genre"
fig.show()